Libraries used in the analysis

library(DT)
library(ggplot2)
library(clusterProfiler)
library(org.Mm.eg.db)
library(org.Hs.eg.db)
library(pathview)
library(DOSE)
library(ggnewscale)
library(readxl)
library(ComplexHeatmap)
library(stringr)

Loading in the necessary data

PD1 vs Combo (PD1/Combo)

Differential Expression Data

RSEM was run on each sample. Using the RSEM-quantified gene expression, DESeq2 was run between arms. The fold change order is as follows: A_vs_B => numerator_vs_denominator. The following table is interactive and shows the differential expression results for PD1 vs combination therapy samples. The magnitude log2 fold change is very small towards PD1 and towards Combo.

Volcano Plots

gsea results on GO and KEGG terms

Gene-set enrichment analysis within the GO pathways show enrichment for ribosome pathways, protein synthesis and catalysis, innate and adaptive immune response, B-cell proliferation, an antiviral immune response, interferon production, cellular response to interferon, RNA translation, and RNA and mRNA binding.

untreated vs Combo (untreated/Combo)

Differential Expression Data

RSEM was run on each sample. Using the RSEM-quantified gene expression, DESeq2 was run between arms. The fold change order is as follows: A_vs_B => numerator_vs_denominator. The following table is interactive and shows the differential expression results for PD1 vs combination therapy samples. The magnitude log2 fold change is very small towards PD1 and towards Combo.

Volcano Plots

gsea results on GO and KEGG terms

Gene-set enrichment analysis within the GO pathways show enrichment for ribosome pathways, protein synthesis and catalysis, innate and adaptive immune response, B-cell proliferation, an antiviral immune response, interferon production, cellular response to interferon, RNA translation, and RNA and mRNA binding.

untreated vs pd1 (unctreate/pd1)

Differential Expression Data

RSEM was run on each sample. Using the RSEM-quantified gene expression, DESeq2 was run between arms. The fold change order is as follows: A_vs_B => numerator_vs_denominator. The following table is interactive and shows the differential expression results for PD1 vs combination therapy samples. The magnitude log2 fold change is very small towards PD1 and towards Combo.

Volcano Plots

gsea results on GO and KEGG terms

Gene-set enrichment analysis within the GO pathways show enrichment for ribosome pathways, protein synthesis and catalysis, innate and adaptive immune response, B-cell proliferation, an antiviral immune response, interferon production, cellular response to interferon, RNA translation, and RNA and mRNA binding.

Analyzing the mutation signatures

mut_file_1 <- "F:/Ali_data/WES_Data_for_4T1MIS/MB01JHU503/MB01JHU503_000_analysis/MAF/6433_NetMHCpan_mutation_peptides_1.xls.txt"
mut_file_2 <- "F:/Ali_data/WES_Data_for_4T1MIS/MB01JHU503/MB01JHU503_000_analysis/MAF/6080_NetMHCpan_mutation_peptides_5001.xls.txt"
mut_file_3 <- "F:/Ali_data/WES_Data_for_4T1MIS/MB01JHU503/MB01JHU503_000_analysis/MAF/31185_NetMHCpan_mutation_peptides_10001.xls.txt"

mut_file_1_dat <- read.table(mut_file_1,skip=2)
mut_file_2_dat <- read.table(mut_file_2,skip=2)
mut_file_3_dat <- read.table(mut_file_3,skip=2)

mut_file_data <- rbind(mut_file_1_dat,mut_file_2_dat,mut_file_3_dat)

rm(mut_file_1_dat,mut_file_2_dat,mut_file_3_dat)

maf_file <- "F:/Ali_data/WES_Data_for_4T1MIS/MB01JHU503/MB01JHU503_000_analysis/MAF/MAF_wsequences.maf"
maf_file_data <- read.table(maf_file,sep="\t",header=T)

maf_row_vals<-unname(vapply(mut_file_data$V3,function(val){as.numeric(strsplit(val,"_")[[1]][2])},numeric(1)))
mutation_genes_from_maf <- maf_file_data$Hugo_Symbol[maf_row_vals]
maf_data_all <- maf_file_data[maf_row_vals,]

binding_rank_locs <- c(7,11,15) # location of ranks from NET-MHC output
binders <- t(vapply(seq(nrow(mut_file_data)),function(val){
  ranks <- as.numeric(mut_file_data[val,binding_rank_locs])
  SB <- length(which(ranks<=0.5))
  WB <- length(which(ranks<=2 & ranks > 0.5))
  return(c(SB,WB))
},numeric(2)))
binders <- as.data.frame(binders)
colnames(binders)<-c("SB","WB")
binders$gene <- mutation_genes_from_maf
maf_data_all <- cbind(maf_data_all,binders)

all_genes <- unique(binders$gene)
binders_summary <- t(vapply(all_genes,function(gene){
  binders_small <- binders[binders$gene==gene,]
  SB <- sum(binders_small$SB)
  WB <- sum(binders_small$WB)
  return(c(SB,WB))
},numeric(2)))
binders_summary <- as.data.frame(binders_summary)
colnames(binders_summary) <- c("SB","WB")
binders_summary$gene <- all_genes
binders_summary <- binders_summary[binders_summary$SB > 0 | binders_summary$WB > 0,]

Handling Cell type deconvolutions

cell_deconv_file <- "F:/Ali_data/HTMBmice_RNAseq_Data/MB01JHU504/MB01JHU504_000_analysis/RSEM/tables/genes/TIMER2.0_estimation_matrix_TPM_20220628.csv"
cell_deconv_data <- read.table(cell_deconv_file,header=T,sep=",")
cibersort_rows <- seq(7,28)
cibersort_deconv <- cell_deconv_data[cibersort_rows,]
cibersort_deconv$cell_type <- vapply(cibersort_deconv$cell_type,function(val){strsplit(val,"_")[[1]][1]},character(1))
rownames(cibersort_deconv) <- cibersort_deconv$cell_type
cibersort_deconv <- cibersort_deconv[,seq(2,ncol(cibersort_deconv))]

cibersort_deconv_scaled <- as.data.frame(t(apply(cibersort_deconv[apply(cibersort_deconv,1,sd)>0,],1,scale)))
colnames(cibersort_deconv_scaled) <- colnames(cibersort_deconv)
Heatmap(cibersort_deconv_scaled,cluster_columns=F,cluster_rows=F,show_row_names=T,show_column_names = T,name="CIBERSORT:zscore")

CD8_proportions <- apply(cibersort_deconv[seq(4),,drop=F],2,sum)
names(CD8_proportions) <- colnames(cibersort_deconv)[seq(2,ncol(cibersort_deconv))]
CD4_proportions <- apply(cibersort_deconv[seq(5,7),,drop=F],2,sum)
names(CD4_proportions) <- colnames(cibersort_deconv)[seq(2,ncol(cibersort_deconv))]
TFH_proportions <- apply(cibersort_deconv[seq(8),,drop=F],2,sum)
names(TFH_proportions) <- colnames(cibersort_deconv)[seq(2,ncol(cibersort_deconv))]
TREG_proportions <- apply(cibersort_deconv[seq(10),,drop=F],2,sum)
names(TREG_proportions) <- colnames(cibersort_deconv)[seq(2,ncol(cibersort_deconv))]
TGD_proportions <- apply(cibersort_deconv[seq(11),,drop=F],2,sum)
names(TGD_proportions) <- colnames(cibersort_deconv)[seq(2,ncol(cibersort_deconv))]
M0_proportions <- apply(cibersort_deconv[seq(14),,drop=F],2,sum)
names(M0_proportions) <- colnames(cibersort_deconv)[seq(2,ncol(cibersort_deconv))]
M1_proportions <- apply(cibersort_deconv[seq(15),,drop=F],2,sum)
names(M1_proportions) <- colnames(cibersort_deconv)[seq(2,ncol(cibersort_deconv))]
M2_proportions <- apply(cibersort_deconv[seq(16),,drop=F],2,sum)
names(M2_proportions) <- colnames(cibersort_deconv)[seq(2,ncol(cibersort_deconv))]

cibersort_rows <- seq(1,6)
cibersort_deconv <- cell_deconv_data[cibersort_rows,]
cibersort_deconv$cell_type <- vapply(cibersort_deconv$cell_type,function(val){strsplit(val,"_")[[1]][1]},character(1))
rownames(cibersort_deconv) <- cibersort_deconv$cell_type
cibersort_deconv <- cibersort_deconv[,seq(2,ncol(cibersort_deconv))]

cibersort_deconv_scaled <- as.data.frame(t(apply(cibersort_deconv[apply(cibersort_deconv,1,sd)>0,],1,scale)))
colnames(cibersort_deconv_scaled) <- colnames(cibersort_deconv)
Heatmap(cibersort_deconv_scaled,cluster_columns=F,cluster_rows=F,show_row_names=T,show_column_names = T,name="TIMER2.0:zscore")

combining unormalized TPM expression

samples <- c("C1","C2","C3","C5","C6","C7","P1","P2","P3","P5","P6","P7","U1","U2","U3")
for (i in seq(length(samples))){
  if (i==1){
    all_TPM <- read.table(sprintf("F:/Ali_data/HTMBmice_RNAseq_Data/MB01JHU504/MB01JHU504_000_analysis/RSEM/tables/genes/%s_rsem_mm10.genes.results",samples[i]),header=T)
    all_TPM <- all_TPM[,c("gene_id","TPM")]
  } else {
    TPM_temp <- read.table(sprintf("F:/Ali_data/HTMBmice_RNAseq_Data/MB01JHU504/MB01JHU504_000_analysis/RSEM/tables/genes/%s_rsem_mm10.genes.results",samples[i]),header=T)
    all_TPM <- cbind(all_TPM,TPM_temp[,c("TPM")])
    colnames(all_TPM)<-c("gene_id",samples[seq(i)])
  }
}
rownames(all_TPM) <- all_TPM$gene

Loading in the RSEM gene expression and the DESEQ2 data

PD1_vs_Combo_deseq2_file <- "F:/Ali_data/HTMBmice_RNAseq_Data/MB01JHU504/MB01JHU504_000_analysis/DESeq2/refseq_genes/PD1_vs_Combo_genes_DESeq2.txt"
Untreated_vs_Combo_deseq2_file <- "F:/Ali_data/HTMBmice_RNAseq_Data/MB01JHU504/MB01JHU504_000_analysis/DESeq2/refseq_genes/Untreated_vs_Combo_genes_DESeq2.txt"
Untreated_vs_PD1_deseq2_file <- "F:/Ali_data/HTMBmice_RNAseq_Data/MB01JHU504/MB01JHU504_000_analysis/DESeq2/refseq_genes/Untreated_vs_PD1_genes_DESeq2.txt"

PD1_vs_Combo_deseq2_data <- read.table(PD1_vs_Combo_deseq2_file,header=T,sep="\t")
all_sig_genes_tot <- PD1_vs_Combo_deseq2_data$gene_symbol[which(PD1_vs_Combo_deseq2_data$PD1_vs_Combo_pvalue <= 0.05)]

Untreated_vs_Combo_deseq2_data <- read.table(Untreated_vs_Combo_deseq2_file,header=T,sep="\t")
all_sig_genes_tot <- c(all_sig_genes_tot,Untreated_vs_Combo_deseq2_data$gene_symbol[which(Untreated_vs_Combo_deseq2_data$Untreated_vs_Combo_pvalue <= 0.05)])

Untreated_vs_PD1_deseq2_data <- read.table(Untreated_vs_PD1_deseq2_file,header=T,sep="\t")
all_sig_genes_tot <- c(all_sig_genes_tot,Untreated_vs_Combo_deseq2_data$gene_symbol[which(Untreated_vs_Combo_deseq2_data$Untreated_vs_Combo_pvalue <= 0.05)])

all_sig_genes_tot <- unique(all_sig_genes_tot)
binders_summary_filt <- binders_summary[binders_summary$gene %in% all_sig_genes_tot,]
binders_summary_filt_tot <- binders_summary_filt

# reading in gene expression

RSEM_gene_expression_TPM_file <- "F:/Ali_data/HTMBmice_RNAseq_Data/MB01JHU504/MB01JHU504_000_analysis/RSEM/tables/genes/genes_tpm_all_samples_norm.txt"
RSEM_gene_expression_TPM <- read.table(RSEM_gene_expression_TPM_file,sep="\t",header=T)
# RSEM_gene_expression_TPM <- all_TPM
RSEM_gene_expression_TPM_filtered <- RSEM_gene_expression_TPM[RSEM_gene_expression_TPM$gene_id %in% binders_summary_filt$gene,]
sample_cols <- colnames(RSEM_gene_expression_TPM)[seq(2,ncol(RSEM_gene_expression_TPM))]
rownames(RSEM_gene_expression_TPM_filtered)<-RSEM_gene_expression_TPM_filtered$gene_id
RSEM_gene_expression_TPM_filtered <- RSEM_gene_expression_TPM_filtered[,seq(2,ncol(RSEM_gene_expression_TPM_filtered))]

binders_WB_matrix <- as.data.frame(t(matrix(binders_summary_filt$WB, nrow=length(sample_cols), ncol=length(binders_summary_filt$WB), byrow=TRUE)))
colnames(binders_WB_matrix) <- sample_cols
rownames(binders_WB_matrix) <- binders_summary_filt$gene
binders_SB_matrix <- as.data.frame(t(matrix(binders_summary_filt$SB, nrow=length(sample_cols), ncol=length(binders_summary_filt$SB), byrow=TRUE)))
colnames(binders_SB_matrix) <- sample_cols
rownames(binders_SB_matrix) <- binders_summary_filt$gene

RSEM_gene_expression_TPM_filtered <- RSEM_gene_expression_TPM_filtered[rownames(binders_WB_matrix),]

RSEM_scaled <- as.data.frame(t(apply(RSEM_gene_expression_TPM_filtered[apply(RSEM_gene_expression_TPM_filtered,1,sd)>0,],1,scale)))
colnames(RSEM_scaled) <- colnames(RSEM_gene_expression_TPM_filtered)
Heatmap(RSEM_scaled,cluster_columns=F,name="zscore",show_row_names=F,
        left_annotation = rowAnnotation(SB=anno_barplot(binders_summary_filt$SB[apply(RSEM_gene_expression_TPM_filtered,1,sd)>0]),
                                        WB=anno_barplot(binders_summary_filt$WB[apply(RSEM_gene_expression_TPM_filtered,1,sd)>0])),
        top_annotation = HeatmapAnnotation(CD8=anno_barplot(CD8_proportions),
                                           CD4=anno_barplot(CD4_proportions)))

RSEM_binders <- cbind(RSEM_gene_expression_TPM_filtered,binders_summary_filt$SB,binders_summary_filt$WB)
colnames(RSEM_binders) <- c(colnames(RSEM_scaled),"SB","WB")

RSEM_summary <- as.data.frame(vapply(c("combo","PD1","untreated"),function(val){
  if (val=="combo"){
    return(apply(RSEM_gene_expression_TPM_filtered[,seq(1,6)],1,mean))
  } else if (val=="PD1"){
    return(apply(RSEM_gene_expression_TPM_filtered[,seq(7,12)],1,mean))
  } else {
    return(apply(RSEM_gene_expression_TPM_filtered[,seq(13,15)],1,mean))
  }
},numeric(nrow(RSEM_binders))))

RSEM_summary$order_min <- vapply(seq(nrow(RSEM_summary)),function(val){
  expression <- RSEM_summary[val,]
  if (expression[1]<expression[2] & expression[1]<expression[3]){
    return("combo")
  }else if (expression[3]<expression[1] & expression[3]<expression[2]){
    return("untreated")
  } else if (expression[2]<expression[3] & expression[2]<expression[1] ){
    return("PD1")
  } else {
    return("NA")
  }
},character(1))
RSEM_summary$order_max <- vapply(seq(nrow(RSEM_summary)),function(val){
  expression <- RSEM_summary[val,]
  if (expression[1]>expression[2] & expression[1]>expression[3]){
    return("combo")
  }else if (expression[3]>expression[1] & expression[3]>expression[2]){
    return("untreated")
  } else if (expression[2]>expression[3] & expression[2]>expression[1] ){
    return("PD1")
  } else {
    return("NA")
  }
},character(1))
RSEM_summary$FC_combo_pd1 <- vapply(seq(nrow(RSEM_summary)),function(val){
    expression <- RSEM_summary[val,]
    return((as.numeric(expression[1]+1))/as.numeric((expression[2]+1)))
},numeric(1))
RSEM_summary$FC_combo_untreated <- vapply(seq(nrow(RSEM_summary)),function(val){
    expression <- RSEM_summary[val,]
    return((as.numeric(expression[1]+1))/as.numeric((expression[3]+1)))
},numeric(1))
SB<-binders_summary_filt$SB
WB<-binders_summary_filt$WB
RSEM_summary <- cbind(RSEM_summary,SB,WB)

ggplot(RSEM_summary,aes(x=order_min,fill=order_min))+geom_bar()+
  xlab("gene expression")

ggplot(RSEM_summary,aes(x=order_max,fill=order_max))+geom_bar()+
  xlab("gene expression")

ggplot(RSEM_summary,aes(x=log2(FC_combo_pd1)))+
  geom_histogram()+
  xlab("log2(combo TPM / PD1 TPM)")

ggplot(RSEM_summary,aes(x=log2(FC_combo_untreated)))+
  geom_histogram()+
  xlab("log2(combo TPM / untreated TPM)")

ggplot(RSEM_summary,aes(y=SB,x=log2(FC_combo_pd1)))+
  geom_point()+
  xlab("log2(combo TPM / PD1 TPM)")+ylab("SB count")

ggplot(RSEM_summary,aes(y=SB,x=log2(FC_combo_untreated)))+
  geom_point()+
  xlab("log2(combo TPM / untreated TPM)")+ylab("SB count")

ggplot(RSEM_summary,aes(y=WB,x=log2(FC_combo_pd1)))+
  geom_point()+
  xlab("log2(combo TPM / PD1 TPM)")+ylab("WB count")

ggplot(RSEM_summary,aes(y=WB,x=log2(FC_combo_untreated)))+
  geom_point()+
  xlab("log2(combo TPM / untreated TPM)")+ylab("WB count")

PD1_vs_Combo_deseq2_file <- "F:/Ali_data/HTMBmice_RNAseq_Data/MB01JHU504/MB01JHU504_000_analysis/DESeq2/refseq_genes/PD1_vs_Combo_genes_DESeq2.txt"
Untreated_vs_Combo_deseq2_file <- "F:/Ali_data/HTMBmice_RNAseq_Data/MB01JHU504/MB01JHU504_000_analysis/DESeq2/refseq_genes/Untreated_vs_Combo_genes_DESeq2.txt"
Untreated_vs_PD1_deseq2_file <- "F:/Ali_data/HTMBmice_RNAseq_Data/MB01JHU504/MB01JHU504_000_analysis/DESeq2/refseq_genes/Untreated_vs_PD1_genes_DESeq2.txt"

PD1_vs_Combo_deseq2_data <- read.table(PD1_vs_Combo_deseq2_file,header=T,sep="\t")
all_sig_genes <- PD1_vs_Combo_deseq2_data$gene_symbol[which(PD1_vs_Combo_deseq2_data$PD1_vs_Combo_padj <= 0.05)]

Untreated_vs_Combo_deseq2_data <- read.table(Untreated_vs_Combo_deseq2_file,header=T,sep="\t")
all_sig_genes <- c(all_sig_genes,Untreated_vs_Combo_deseq2_data$gene_symbol[which(Untreated_vs_Combo_deseq2_data$Untreated_vs_Combo_padj <= 0.05)])

Untreated_vs_PD1_deseq2_data <- read.table(Untreated_vs_PD1_deseq2_file,header=T,sep="\t")
all_sig_genes <- c(all_sig_genes,Untreated_vs_Combo_deseq2_data$gene_symbol[which(Untreated_vs_Combo_deseq2_data$Untreated_vs_Combo_padj <= 0.05)])

all_sig_genes <- unique(all_sig_genes)

binders_summary_filt <- binders_summary[binders_summary$gene %in% all_sig_genes,]

# reading in gene expression

RSEM_gene_expression_TPM_file <- "F:/Ali_data/HTMBmice_RNAseq_Data/MB01JHU504/MB01JHU504_000_analysis/RSEM/tables/genes/genes_tpm_all_samples_norm.txt"
RSEM_gene_expression_TPM <- read.table(RSEM_gene_expression_TPM_file,sep="\t",header=T)
# RSEM_gene_expression_TPM <- all_TPM
RSEM_gene_expression_TPM_filtered <- RSEM_gene_expression_TPM[RSEM_gene_expression_TPM$gene_id %in% binders_summary_filt$gene,]
sample_cols <- colnames(RSEM_gene_expression_TPM)[seq(2,ncol(RSEM_gene_expression_TPM))]
rownames(RSEM_gene_expression_TPM_filtered)<-RSEM_gene_expression_TPM_filtered$gene_id
RSEM_gene_expression_TPM_filtered <- RSEM_gene_expression_TPM_filtered[,seq(2,ncol(RSEM_gene_expression_TPM_filtered))]

binders_WB_matrix <- as.data.frame(t(matrix(binders_summary_filt$WB, nrow=length(sample_cols), ncol=length(binders_summary_filt$WB), byrow=TRUE)))
colnames(binders_WB_matrix) <- sample_cols
rownames(binders_WB_matrix) <- binders_summary_filt$gene
binders_SB_matrix <- as.data.frame(t(matrix(binders_summary_filt$SB, nrow=length(sample_cols), ncol=length(binders_summary_filt$SB), byrow=TRUE)))
colnames(binders_SB_matrix) <- sample_cols
rownames(binders_SB_matrix) <- binders_summary_filt$gene

RSEM_gene_expression_TPM_filtered <- RSEM_gene_expression_TPM_filtered[rownames(binders_WB_matrix),]

RSEM_scaled <- as.data.frame(t(apply(RSEM_gene_expression_TPM_filtered[apply(RSEM_gene_expression_TPM_filtered,1,sd)>0,],1,scale)))
colnames(RSEM_scaled) <- colnames(RSEM_gene_expression_TPM_filtered)
Heatmap(RSEM_scaled,cluster_columns=F,name="zscore",show_row_names=F,
        left_annotation = rowAnnotation(SB=anno_barplot(binders_summary_filt$SB[apply(RSEM_gene_expression_TPM_filtered,1,sd)>0]),
                                        WB=anno_barplot(binders_summary_filt$WB[apply(RSEM_gene_expression_TPM_filtered,1,sd)>0])),
        top_annotation = HeatmapAnnotation(CD8=anno_barplot(CD8_proportions),
                                           CD4=anno_barplot(CD4_proportions)))

RSEM_binders <- cbind(RSEM_gene_expression_TPM_filtered,binders_summary_filt$SB,binders_summary_filt$WB)
colnames(RSEM_binders) <- c(colnames(RSEM_scaled),"SB","WB")

RSEM_summary <- as.data.frame(vapply(c("combo","PD1","untreated"),function(val){
  if (val=="combo"){
    return(apply(RSEM_gene_expression_TPM_filtered[,seq(1,6)],1,mean))
  } else if (val=="PD1"){
    return(apply(RSEM_gene_expression_TPM_filtered[,seq(7,12)],1,mean))
  } else {
    return(apply(RSEM_gene_expression_TPM_filtered[,seq(13,15)],1,mean))
  }
},numeric(nrow(RSEM_binders))))

RSEM_summary$order_min <- vapply(seq(nrow(RSEM_summary)),function(val){
  expression <- RSEM_summary[val,]
  if (expression[1]<expression[2] & expression[1]<expression[3]){
    return("combo")
  }else if (expression[3]<expression[1] & expression[3]<expression[2]){
    return("untreated")
  } else if (expression[2]<expression[3] & expression[2]<expression[1] ){
    return("PD1")
  } else {
    return("NA")
  }
},character(1))
RSEM_summary$order_max <- vapply(seq(nrow(RSEM_summary)),function(val){
  expression <- RSEM_summary[val,]
  if (expression[1]>expression[2] & expression[1]>expression[3]){
    return("combo")
  }else if (expression[3]>expression[1] & expression[3]>expression[2]){
    return("untreated")
  } else if (expression[2]>expression[3] & expression[2]>expression[1] ){
    return("PD1")
  } else {
    return("NA")
  }
},character(1))
RSEM_summary$FC_combo_pd1 <- vapply(seq(nrow(RSEM_summary)),function(val){
    expression <- RSEM_summary[val,]
    return((as.numeric(expression[1]+1))/as.numeric((expression[2]+1)))
},numeric(1))
RSEM_summary$FC_combo_untreated <- vapply(seq(nrow(RSEM_summary)),function(val){
    expression <- RSEM_summary[val,]
    return((as.numeric(expression[1]+1))/as.numeric((expression[3]+1)))
},numeric(1))
SB<-binders_summary_filt$SB
WB<-binders_summary_filt$WB
RSEM_summary <- cbind(RSEM_summary,SB,WB)

ggplot(RSEM_summary,aes(x=order_min,fill=order_min))+geom_bar()+
  xlab("gene expression")

ggplot(RSEM_summary,aes(x=order_max,fill=order_max))+geom_bar()+
  xlab("gene expression")

ggplot(RSEM_summary,aes(x=log2(FC_combo_pd1)))+
  geom_histogram()+
  xlab("log2(combo TPM / PD1 TPM)")

ggplot(RSEM_summary,aes(x=log2(FC_combo_untreated)))+
  geom_histogram()+
  xlab("log2(combo TPM / untreated TPM)")

ggplot(RSEM_summary,aes(y=SB,x=log2(FC_combo_pd1)))+
  geom_point()+
  xlab("log2(combo TPM / PD1 TPM)")+ylab("SB count")

ggplot(RSEM_summary,aes(y=SB,x=log2(FC_combo_untreated)))+
  geom_point()+
  xlab("log2(combo TPM / untreated TPM)")+ylab("SB count")

ggplot(RSEM_summary,aes(y=WB,x=log2(FC_combo_pd1)))+
  geom_point()+
  xlab("log2(combo TPM / PD1 TPM)")+ylab("WB count")

ggplot(RSEM_summary,aes(y=WB,x=log2(FC_combo_untreated)))+
  geom_point()+
  xlab("log2(combo TPM / untreated TPM)")+ylab("WB count")

Preparing mutations for VAF analysis

MAF_filt <- maf_file_data[maf_row_vals,]
MAF_filt <- cbind(MAF_filt,binders)
MAF_filt <- MAF_filt[MAF_filt$Hugo_Symbol %in% all_sig_genes_tot & (MAF_filt$SB>0 | MAF_filt$WB>0),]
MAF_coords <- MAF_filt[,c("Chromosome","Start_Position")]
MAF_coords$Chromosome <- vapply(MAF_coords$Chromosome,function(val){paste(c("chr",val),collapse="")},character(1))
# write.table(MAF_coords,file="F:/Ali_data/WES_Data_for_4T1MIS/MB01JHU503/MB01JHU503_000_analysis/MAF/mutation_coords.txt",
#             sep="\t",col.names = F,row.names = F,quote=F)

Processing the samtools pileup

calc_vaf <- function(vals){
  pileups <- vals[seq(4,length(vals))]
  target <- vals[3]
  vapply(pileups,function(val){
    a<-strsplit(val,"")[[1]]
    return(length(which(a==target))/length(a))
  },numeric(1))
}

samtools_pileup_file <- "F:/Ali_data/HTMBmice_RNAseq_Data/MB01JHU504/MB01JHU504_000_analysis/RSEM/alignments/all_samples.pileup"
samtools_pileup <- read.table(samtools_pileup_file)
MAF_unique_coords <- unique(MAF_filt[,seq(13)])
MAF_unique_coords$Chromosome <- vapply(MAF_unique_coords$Chromosome,function(val){paste(c("chr",val),collapse="")},character(1))
tumor_alleles <- vapply(seq(nrow(samtools_pileup)),function(row_val){
  chromosome <- samtools_pileup$V1[row_val]
  position <- samtools_pileup$V2[row_val]
  return(MAF_unique_coords[MAF_unique_coords$Chromosome==chromosome & MAF_unique_coords$Start_Position==position,"Tumor_Seq_Allele2"])
},character(1))
samtools_pileup$V3 <- tumor_alleles
samtools_pileup_filt <- samtools_pileup[,c(1,2,3,seq(5,48,3))]

samtools_vaf <- as.data.frame(t(apply(samtools_pileup_filt,1,calc_vaf)))
samtools_vaf <- cbind(samtools_pileup_filt[,seq(2)],samtools_vaf)
colnames(samtools_vaf) <- c("chromosome","position",
                            "C1","C2","C3","C5","C6","C7",
                            "P1","P2","P3","P5","P6","P7",
                            "U1","U2","U3")
samtools_vaf$chromosome <- str_remove(samtools_vaf$chromosome,"chr")
samtools_vaf_extended <- lapply(seq(nrow(samtools_vaf)),function(row_val){
  chrom <- samtools_vaf$chromosome[row_val]
  position <- samtools_vaf$position[row_val]
  maf_data_all_small <- maf_data_all[maf_data_all$Chromosome==chrom & maf_data_all$Start_Position==position,]
  return(cbind(samtools_vaf[row_val,],maf_data_all_small[,seq(ncol(maf_data_all_small)-5,ncol(maf_data_all_small))]))
})
for (i in seq(length(samtools_vaf_extended))){
  if (i==1){
    samtools_vaf_df <- samtools_vaf_extended[[1]]
  } else {
   samtools_vaf_df <- rbind(samtools_vaf_df,samtools_vaf_extended[[i]])
  }
}
samtools_vaf_df <- samtools_vaf_df[samtools_vaf_df$SB>0 | samtools_vaf_df$WB>0,]

samtools_vaf_summary <- unique(samtools_vaf_df[,c(seq(3,17),23)])
samtools_vaf_summary[,c("SB","WB")]<-vapply(samtools_vaf_summary$gene,function(gene){
  SB<-binders_summary_filt_tot[binders_summary_filt_tot$gene==gene,"SB"]
  WB<-binders_summary_filt_tot[binders_summary_filt_tot$gene==gene,"WB"]
  return(c(SB,WB))
},numeric(2))

Heatmap(samtools_vaf_summary[,seq(ncol(samtools_vaf_summary)-4)],cluster_columns=F,name="zscore",show_row_names=F,
        left_annotation = rowAnnotation(SB=anno_barplot(samtools_vaf_summary$SB),
                                        WB=anno_barplot(samtools_vaf_summary$WB)))

samtools_vaf_summary_scaled <- as.data.frame(t(apply(samtools_vaf_summary[apply(samtools_vaf_summary[,seq(ncol(samtools_vaf_summary)-4)],1,sd)>0,seq(ncol(samtools_vaf_summary)-4)],1,scale)))
colnames(samtools_vaf_summary_scaled)<-colnames(samtools_vaf_summary)[seq(ncol(samtools_vaf_summary)-4)]
Heatmap(samtools_vaf_summary_scaled,cluster_columns=F,name="zscore",show_row_names=F,
        left_annotation = rowAnnotation(SB=anno_barplot(samtools_vaf_summary$SB[apply(samtools_vaf_summary[,seq(ncol(samtools_vaf_summary)-4)],1,sd)>0]),
                                        WB=anno_barplot(samtools_vaf_summary$WB[apply(samtools_vaf_summary[,seq(ncol(samtools_vaf_summary)-4)],1,sd)>0])))

ggplot(samtools_vaf_summary,aes(x=C1,y=log2(WB+1)))+geom_point()

ggplot(samtools_vaf_summary,aes(x=C1,y=log2(SB+1)))+geom_point()

ggplot(samtools_vaf_summary,aes(x=P1,y=log2(WB+1)))+geom_point()

ggplot(samtools_vaf_summary,aes(x=P1,y=log2(SB+1)))+geom_point()

ggplot(samtools_vaf_summary,aes(x=U1,y=log2(WB+1)))+geom_point()

ggplot(samtools_vaf_summary,aes(x=U1,y=log2(SB+1)))+geom_point()